package data.preprocessing;

/**
 * Remove all the punctuation and replace all the accented letter with non
 * accented letter from the String. punctuation:标点
 * 
 * @author guillaumes
 * 
 */
public class StringProcessor_RemovePunctuation implements StringProcessor {

	public String map(String from) {
		from = from.replaceAll("[àâäÂÄ]", "a");
		from = from.replaceAll("[éèêëÊË]", "e");
		from = from.replaceAll("[îïÎÏ]", "i");
		from = from.replaceAll("[ôöÔÖ]", "o");
		from = from.replaceAll("[ùûüÛÜ]", "u");
		from = from.replaceAll("ç", "c");
		from = from.replaceAll("[^\\w]", " ");
		return (from);
	}
	
	public static void main(String args[]){
		String text = "numerical_number";
		StringProcessor sp = new StringProcessor_RemovePunctuation();
		System.out.println(sp.map(text));
//		System.out.println("|\t|\n||\f|\r");
	}
}
